今回だけでggplot2を習得するのは難しいので本当に紹介程度です。 コピペでも全然OKです。 岩嵜先生のとても参考になる資料がよくまとまっています。 資料1 資料2
日本語の資料はいくらでもあるし、英語も含めればなんでもある。
データ可視化は重要。 Rは可視化に強い。特に人気の可視化パッケージであるggplot2を中心に学ぶ。
これでよく論文で見かける図が作れる!matplotlib?seaborn?知らないぁ。悪いがR以外は帰ってくれないか。最近のいけてる論文のfigは結構ggplot2で作成されている ggplot2の説明は難しいのでとにかくコードを実行してたくさん図を作っていきましょう。
イメージはレイヤを重ねていく感じ
基本 ggplot() # 使うデータを指定 aes() # x軸、y軸など指定 geom_() # 棒グラフか、散布図かなど theme_() # 背景や軸の見栄え
library(ggplot2)
head(diamonds) # ダイアモンドのデータ
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
head(mpg) # 自動車の燃費のデータ
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manua… f 21 29 p comp…
## 3 audi a4 2 2008 4 manua… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto(… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto(… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manua… f 18 26 p comp…
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = "red"))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~class)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = class)) +
facet_wrap(~class)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, colour = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(aes(colour = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
##########
ggplot(data = diamonds, mapping = aes(x = cut)) +
geom_bar()
ggplot(data = diamonds, mapping = aes(x = cut, fill = cut)) +
geom_bar()
ggplot(data = diamonds, mapping = aes(x = cut, y = price)) +
geom_boxplot()
a <- ggplot(diamonds, aes(cut, price, fill = cut)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.08, fill = "white", outlier.size = FALSE) +
theme_light()
a + coord_flip()
diamondsやmpgを使って基本的なプロット geom_boxなど
画像の保存方法 GUIでやるかggsave関数
これ以降は発展的内容
参照 視覚多様性の時代
# viridis theme
data <- data.frame(x = rnorm(10000), y = rnorm(10000))
# viridis
g1 <- ggplot(data, aes(x = x, y = y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c() +
theme_bw()
# magma
g2 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "magma") +
theme_bw()
# inferno
g3 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "inferno") +
theme_bw()
# plasma
g4 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "plasma") +
theme_bw()
# cividis
g5 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "cividis") +
theme_bw()
# default colour of ggplot2
g6 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() + theme_bw()
gridExtra::grid.arrange(g1, g2, g3, g4, g5, g6)
gplotsパッケージのheatmap.2関数を使ってヒートマップ作成
heatmap_2 <- read.table("heatmap_2.txt", header = T, sep = "\t", row.names = 1)
head(heatmap_2)
## GSM995221_AD01M001.CEL.gz GSM995222_AD01M002.CEL.gz
## Il1r2 9.709961 9.499286
## Il1rl1 10.869457 10.595727
## Il18rap 8.661484 8.596499
## Icos 11.556232 11.625647
## Fam129a 10.466855 10.324924
## Rgs16 10.649960 10.630798
## GSM995223_AD01M003.CEL.gz GSM995224_AD01M004.CEL.gz
## Il1r2 8.653889 8.677315
## Il1rl1 9.412765 9.279133
## Il18rap 7.772801 7.761333
## Icos 11.010891 10.862727
## Fam129a 9.845083 9.640511
## Rgs16 10.320202 10.154570
## GSM995225_AD01M005.CEL.gz GSM995226_AD01M006.CEL.gz
## Il1r2 6.766613 7.282546
## Il1rl1 6.696681 6.555741
## Il18rap 6.975710 6.711794
## Icos 8.990778 9.695695
## Fam129a 8.449838 9.240955
## Rgs16 9.282057 9.599733
## GSM995227_AD01M007.CEL.gz GSM995228_AD01M008.CEL.gz
## Il1r2 7.030207 7.114095
## Il1rl1 6.561950 6.557071
## Il18rap 6.987141 6.770077
## Icos 9.515691 9.177716
## Fam129a 8.827236 8.570377
## Rgs16 9.579403 9.278177
colnames(heatmap_2) <- c("1", "2", "3", "4", "5", "6", "7", "8")
head(heatmap_2)
## 1 2 3 4 5 6 7
## Il1r2 9.709961 9.499286 8.653889 8.677315 6.766613 7.282546 7.030207
## Il1rl1 10.869457 10.595727 9.412765 9.279133 6.696681 6.555741 6.561950
## Il18rap 8.661484 8.596499 7.772801 7.761333 6.975710 6.711794 6.987141
## Icos 11.556232 11.625647 11.010891 10.862727 8.990778 9.695695 9.515691
## Fam129a 10.466855 10.324924 9.845083 9.640511 8.449838 9.240955 8.827236
## Rgs16 10.649960 10.630798 10.320202 10.154570 9.282057 9.599733 9.579403
## 8
## Il1r2 7.114095
## Il1rl1 6.557071
## Il18rap 6.770077
## Icos 9.177716
## Fam129a 8.570377
## Rgs16 9.278177
class(heatmap_2)
## [1] "data.frame"
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(viridis)
## Loading required package: viridisLite
# png("heatmap.png", width = 400, height = 800)
heatmap.2(as.matrix(heatmap_2), col = bluered(256), trace = "none", cexCol = 0.9, cexRow = 0.5)
heatmap.2(as.matrix(heatmap_2), col = bluered(256), trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row") # scaling
# Seurat colour
heatmap.2(as.matrix(heatmap_2), col = colorRampPalette(c("#FF00FF", "#000000", "#FFFF00")), trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5)
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "col")
heatmap.2(as.matrix(heatmap_2), col = magma, trace = "none", cexCol = 0.9, cexRow = 0.5)
# scaling
heatmap.2(as.matrix(heatmap_2), col = magma, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
heatmap.2(as.matrix(heatmap_2), col = plasma, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
heatmap.2(as.matrix(heatmap_2), col = inferno, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
heatmap.2(as.matrix(heatmap_2), col = cividis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
同じような作業を繰り返すなら簡単なプログラミングで効率的に この程度ならfor構文で構わない map関数でもできると思う(勉強不足)
for (i in c(viridis, magma, plasma, inferno, cividis)) {
heatmap.2(as.matrix(heatmap_2), col = i, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
}
complexheatmapを用いると複雑なヒートマップ描画できるが今回は割愛
発展 論文でよく見る図を再現してみる MA plot Scatter plot Volcano plot
散布図に遺伝子名を追加ggrepel
seqlogoとか genome browserもいける
地図かける geom_sf()
動画も作れる gganimate